from pprint import pprint

import requests, re
from bs4 import BeautifulSoup

"""
Beautiful Soup bs4 ---->用于格式化HTML数据
"""

url = "https://movie.douban.com/top250"
response = requests.get(url, headers={"User-Agent": "Moozilla/5.0 (Windows NT 10.0; Win64; x64)"})
response.encoding = "utf-8"

"""
print(response.text)
print(response.status_code)
if response.ok:
    movies = re.findall('<span class="title">([\u4e00-\u9fa5]+)</span>', response.text)
    print(movies)
else:
    print("获取数据失败")
"""

soup: BeautifulSoup = BeautifulSoup(response.text, "html.parser")
print(soup.p)  # html里面第一个p标签

prices = soup.findAll("span", attrs={"class": "inq"})
# [ <span class="inq">希望让人自由。</span>,
#  <span class="inq">风华绝代。</span>,
#  <span class="inq">一部美国近现代史。</span> ]
# pprint(prices)

for price in prices:
    print(price.string)  # 只获得数据
